library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.0     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
devtools::install_github("ericonsi/EHData")
## Skipping install of 'EHData' from a github remote, the SHA1 (ccd698bd) has not changed since last install.
##   Use `force = TRUE` to force installation
library(EHData)
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.0.5
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggsci)
## Warning: package 'ggsci' was built under R version 4.0.5
library(caret)
## Warning: package 'caret' was built under R version 4.0.5
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(pROC)
## Warning: package 'pROC' was built under R version 4.0.5
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(car)
## Warning: package 'car' was built under R version 4.0.5
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
df <- read.csv("D:\\RStudio\\CUNY_621\\Assignment 3\\crime-training-data_modified.csv")
summary(df)
##        zn             indus             chas              nox        
##  Min.   :  0.00   Min.   : 0.460   Min.   :0.00000   Min.   :0.3890  
##  1st Qu.:  0.00   1st Qu.: 5.145   1st Qu.:0.00000   1st Qu.:0.4480  
##  Median :  0.00   Median : 9.690   Median :0.00000   Median :0.5380  
##  Mean   : 11.58   Mean   :11.105   Mean   :0.07082   Mean   :0.5543  
##  3rd Qu.: 16.25   3rd Qu.:18.100   3rd Qu.:0.00000   3rd Qu.:0.6240  
##  Max.   :100.00   Max.   :27.740   Max.   :1.00000   Max.   :0.8710  
##        rm             age              dis              rad       
##  Min.   :3.863   Min.   :  2.90   Min.   : 1.130   Min.   : 1.00  
##  1st Qu.:5.887   1st Qu.: 43.88   1st Qu.: 2.101   1st Qu.: 4.00  
##  Median :6.210   Median : 77.15   Median : 3.191   Median : 5.00  
##  Mean   :6.291   Mean   : 68.37   Mean   : 3.796   Mean   : 9.53  
##  3rd Qu.:6.630   3rd Qu.: 94.10   3rd Qu.: 5.215   3rd Qu.:24.00  
##  Max.   :8.780   Max.   :100.00   Max.   :12.127   Max.   :24.00  
##       tax           ptratio         lstat             medv      
##  Min.   :187.0   Min.   :12.6   Min.   : 1.730   Min.   : 5.00  
##  1st Qu.:281.0   1st Qu.:16.9   1st Qu.: 7.043   1st Qu.:17.02  
##  Median :334.5   Median :18.9   Median :11.350   Median :21.20  
##  Mean   :409.5   Mean   :18.4   Mean   :12.631   Mean   :22.59  
##  3rd Qu.:666.0   3rd Qu.:20.2   3rd Qu.:16.930   3rd Qu.:25.00  
##  Max.   :711.0   Max.   :22.0   Max.   :37.970   Max.   :50.00  
##      target      
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.4914  
##  3rd Qu.:1.0000  
##  Max.   :1.0000
str(df)
## 'data.frame':    466 obs. of  13 variables:
##  $ zn     : num  0 0 0 30 0 0 0 0 0 80 ...
##  $ indus  : num  19.58 19.58 18.1 4.93 2.46 ...
##  $ chas   : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ nox    : num  0.605 0.871 0.74 0.428 0.488 0.52 0.693 0.693 0.515 0.392 ...
##  $ rm     : num  7.93 5.4 6.49 6.39 7.16 ...
##  $ age    : num  96.2 100 100 7.8 92.2 71.3 100 100 38.1 19.1 ...
##  $ dis    : num  2.05 1.32 1.98 7.04 2.7 ...
##  $ rad    : int  5 5 24 6 3 5 24 24 5 1 ...
##  $ tax    : int  403 403 666 300 193 384 666 666 224 315 ...
##  $ ptratio: num  14.7 14.7 20.2 16.6 17.8 20.9 20.2 20.2 20.2 16.4 ...
##  $ lstat  : num  3.7 26.82 18.85 5.19 4.82 ...
##  $ medv   : num  50 13.4 15.4 23.7 37.9 26.5 5 7 22.2 20.9 ...
##  $ target : int  1 1 1 0 0 0 1 1 0 0 ...
library(psych)
## Warning: package 'psych' was built under R version 4.0.5
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
EHSummarize_StandardPlots(df, "target", type="box")

EHExplore_Multicollinearity(df, run_all=TRUE)
## corrplot 0.92 loaded

##                  zn       indus        chas         nox          rm         age
## zn       1.00000000 -0.53826643 -0.04016203 -0.51704518  0.31981410 -0.57258054
## indus   -0.53826643  1.00000000  0.06118317  0.75963008 -0.39271181  0.63958182
## chas    -0.04016203  0.06118317  1.00000000  0.09745577  0.09050979  0.07888366
## nox     -0.51704518  0.75963008  0.09745577  1.00000000 -0.29548972  0.73512782
## rm       0.31981410 -0.39271181  0.09050979 -0.29548972  1.00000000 -0.23281251
## age     -0.57258054  0.63958182  0.07888366  0.73512782 -0.23281251  1.00000000
## dis      0.66012434 -0.70361886 -0.09657711 -0.76888404  0.19901584 -0.75089759
## rad     -0.31548119  0.60062839 -0.01590037  0.59582984 -0.20844570  0.46031430
## tax     -0.31928408  0.73222922 -0.04676476  0.65387804 -0.29693430  0.51212452
## ptratio -0.39103573  0.39468980 -0.12866058  0.17626871 -0.36034706  0.25544785
## lstat   -0.43299252  0.60711023 -0.05142322  0.59624264 -0.63202445  0.60562001
## medv     0.37671713 -0.49617432  0.16156528 -0.43012267  0.70533679 -0.37815605
## target  -0.43168176  0.60485074  0.08004187  0.72610622 -0.15255334  0.63010625
##                 dis         rad         tax    ptratio       lstat       medv
## zn       0.66012434 -0.31548119 -0.31928408 -0.3910357 -0.43299252  0.3767171
## indus   -0.70361886  0.60062839  0.73222922  0.3946898  0.60711023 -0.4961743
## chas    -0.09657711 -0.01590037 -0.04676476 -0.1286606 -0.05142322  0.1615653
## nox     -0.76888404  0.59582984  0.65387804  0.1762687  0.59624264 -0.4301227
## rm       0.19901584 -0.20844570 -0.29693430 -0.3603471 -0.63202445  0.7053368
## age     -0.75089759  0.46031430  0.51212452  0.2554479  0.60562001 -0.3781560
## dis      1.00000000 -0.49499193 -0.53425464 -0.2333394 -0.50752800  0.2566948
## rad     -0.49499193  1.00000000  0.90646323  0.4714516  0.50310125 -0.3976683
## tax     -0.53425464  0.90646323  1.00000000  0.4744223  0.56418864 -0.4900329
## ptratio -0.23333940  0.47145160  0.47442229  1.0000000  0.37735605 -0.5159153
## lstat   -0.50752800  0.50310125  0.56418864  0.3773560  1.00000000 -0.7358008
## medv     0.25669476 -0.39766826 -0.49003287 -0.5159153 -0.73580078  1.0000000
## target  -0.61867312  0.62810492  0.61111331  0.2508489  0.46912702 -0.2705507
##              target
## zn      -0.43168176
## indus    0.60485074
## chas     0.08004187
## nox      0.72610622
## rm      -0.15255334
## age      0.63010625
## dis     -0.61867312
## rad      0.62810492
## tax      0.61111331
## ptratio  0.25084892
## lstat    0.46912702
## medv    -0.27055071
## target   1.00000000

dfM <- df %>%
  mutate(znM = zn*-1, rmM = rm*-1 )

#df <- EHPrepare_ScaleAllButTarget(df, "target")

df22 <- df %>%
  mutate(ptOver13 = as.numeric(ifelse(ptratio>13,1,0)))

str(df22)
## 'data.frame':    466 obs. of  14 variables:
##  $ zn      : num  0 0 0 30 0 0 0 0 0 80 ...
##  $ indus   : num  19.58 19.58 18.1 4.93 2.46 ...
##  $ chas    : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ nox     : num  0.605 0.871 0.74 0.428 0.488 0.52 0.693 0.693 0.515 0.392 ...
##  $ rm      : num  7.93 5.4 6.49 6.39 7.16 ...
##  $ age     : num  96.2 100 100 7.8 92.2 71.3 100 100 38.1 19.1 ...
##  $ dis     : num  2.05 1.32 1.98 7.04 2.7 ...
##  $ rad     : int  5 5 24 6 3 5 24 24 5 1 ...
##  $ tax     : int  403 403 666 300 193 384 666 666 224 315 ...
##  $ ptratio : num  14.7 14.7 20.2 16.6 17.8 20.9 20.2 20.2 20.2 16.4 ...
##  $ lstat   : num  3.7 26.82 18.85 5.19 4.82 ...
##  $ medv    : num  50 13.4 15.4 23.7 37.9 26.5 5 7 22.2 20.9 ...
##  $ target  : int  1 1 1 0 0 0 1 1 0 0 ...
##  $ ptOver13: num  1 1 1 1 1 1 1 1 1 1 ...
EHExplore_Interactions_Scatterplots(df22, "target", "ptOver13")
## [[1]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[2]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[3]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[4]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[5]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[6]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[7]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[8]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[9]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[10]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[11]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[12]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[13]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[14]]
## `geom_smooth()` using formula 'y ~ x'

EHExplore_OneContinuousAndOneCategoricalColumn_Boxplots(df22, "ptOver13")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

EHExplore_TwoCategoricalColumns_Barcharts(df22, "ptOver13")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

dftt <- df22 %>%
  dplyr::select(target, ptOver13)

table(dftt)
##       ptOver13
## target   0   1
##      0   3 234
##      1  12 217
df222 <- df22 %>%
  group_by(ptOver13) %>%
  summarize(ave_lstat = ave(lstat), ave_medv=ave(medv), ave_target=ave(target))
## `summarise()` has grouped output by 'ptOver13'. You can override using the `.groups` argument.
print (df222)
## # A tibble: 466 x 4
## # Groups:   ptOver13 [2]
##    ptOver13 ave_lstat ave_medv ave_target
##       <dbl>     <dbl>    <dbl>      <dbl>
##  1        0      7.80     37.0        0.8
##  2        0      7.80     37.0        0.8
##  3        0      7.80     37.0        0.8
##  4        0      7.80     37.0        0.8
##  5        0      7.80     37.0        0.8
##  6        0      7.80     37.0        0.8
##  7        0      7.80     37.0        0.8
##  8        0      7.80     37.0        0.8
##  9        0      7.80     37.0        0.8
## 10        0      7.80     37.0        0.8
## # ... with 456 more rows
#a <- EHModel_Regression_Logistic(df, "target")

  logistic_model <- glm(target ~ .,
                        data = df,
                        family = "binomial")

str(df)
## 'data.frame':    466 obs. of  13 variables:
##  $ zn     : num  0 0 0 30 0 0 0 0 0 80 ...
##  $ indus  : num  19.58 19.58 18.1 4.93 2.46 ...
##  $ chas   : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ nox    : num  0.605 0.871 0.74 0.428 0.488 0.52 0.693 0.693 0.515 0.392 ...
##  $ rm     : num  7.93 5.4 6.49 6.39 7.16 ...
##  $ age    : num  96.2 100 100 7.8 92.2 71.3 100 100 38.1 19.1 ...
##  $ dis    : num  2.05 1.32 1.98 7.04 2.7 ...
##  $ rad    : int  5 5 24 6 3 5 24 24 5 1 ...
##  $ tax    : int  403 403 666 300 193 384 666 666 224 315 ...
##  $ ptratio: num  14.7 14.7 20.2 16.6 17.8 20.9 20.2 20.2 20.2 16.4 ...
##  $ lstat  : num  3.7 26.82 18.85 5.19 4.82 ...
##  $ medv   : num  50 13.4 15.4 23.7 37.9 26.5 5 7 22.2 20.9 ...
##  $ target : int  1 1 1 0 0 0 1 1 0 0 ...
  print(logistic_model)
## 
## Call:  glm(formula = target ~ ., family = "binomial", data = df)
## 
## Coefficients:
## (Intercept)           zn        indus         chas          nox           rm  
##  -40.822934    -0.065946    -0.064614     0.910765    49.122297    -0.587488  
##         age          dis          rad          tax      ptratio        lstat  
##    0.034189     0.738660     0.666366    -0.006171     0.402566     0.045869  
##        medv  
##    0.180824  
## 
## Degrees of Freedom: 465 Total (i.e. Null);  453 Residual
## Null Deviance:       645.9 
## Residual Deviance: 192   AIC: 218
 #mmps(logistic_model)

  #df22$ptratio = scale(df22$ptratio)
 
 df23 <- df22 %>%
   dplyr::filter(ptratio>13)
 

 
   m2 <- glm(target ~ .,
                data = df23,
                family = "binomial")
     print(m2)
## 
## Call:  glm(formula = target ~ ., family = "binomial", data = df23)
## 
## Coefficients:
## (Intercept)           zn        indus         chas          nox           rm  
##  -37.333521    -0.129787     0.004314     0.749600    38.875661    -0.716138  
##         age          dis          rad          tax      ptratio        lstat  
##    0.039393     0.878725     0.758820    -0.005851     0.450693     0.034661  
##        medv     ptOver13  
##    0.178087           NA  
## 
## Degrees of Freedom: 450 Total (i.e. Null);  438 Residual
## Null Deviance:       624.6 
## Residual Deviance: 186.3     AIC: 212.3
 mmps(m2)
## Error in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots) : 
##   A term has fewer unique covariate combinations than specified maximum degrees of freedom
## Error in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots) : 
##   A term has fewer unique covariate combinations than specified maximum degrees of freedom

## Error in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots) : 
##   A term has fewer unique covariate combinations than specified maximum degrees of freedom
## Error in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots) : 
##   A term has fewer unique covariate combinations than specified maximum degrees of freedom

df2 <- df %>%
  mutate_at(c(1:12), scale)

EHModel_Regression_Logistic(df2, "target")
## Warning: package 'caTools' was built under R version 4.0.5
## Warning: package 'ROCR' was built under R version 4.0.5
## 
## Call:  glm(formula = fla, family = "binomial", data = df)
## 
## Coefficients:
## (Intercept)           zn        indus         chas          nox           rm  
##      2.3290      -1.5408      -0.4423       0.2339       5.7309      -0.4141  
##         age          dis          rad          tax      ptratio        lstat  
##      0.9683       1.5563       5.7880      -1.0362       0.8844       0.3258  
##        medv  
##      1.6708  
## 
## Degrees of Freedom: 465 Total (i.e. Null);  453 Residual
## Null Deviance:       645.9 
## Residual Deviance: 192   AIC: 218
## 
## Call:
## glm(formula = fla, family = "binomial", data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8464  -0.1445  -0.0017   0.0029   3.4665  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   2.3290     0.7195   3.237  0.00121 ** 
## zn           -1.5408     0.8097  -1.903  0.05706 .  
## indus        -0.4423     0.3260  -1.357  0.17485    
## chas          0.2339     0.1940   1.205  0.22803    
## nox           5.7309     0.9254   6.193 5.90e-10 ***
## rm           -0.4141     0.5095  -0.813  0.41637    
## age           0.9683     0.3912   2.475  0.01333 *  
## dis           1.5563     0.4852   3.208  0.00134 ** 
## rad           5.7880     1.4171   4.084 4.42e-05 ***
## tax          -1.0362     0.4961  -2.089  0.03674 *  
## ptratio       0.8844     0.2782   3.179  0.00148 ** 
## lstat         0.3258     0.3838   0.849  0.39608    
## medv          1.6708     0.6310   2.648  0.00810 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 645.88  on 465  degrees of freedom
## Residual deviance: 192.05  on 453  degrees of freedom
## AIC: 218.05
## 
## Number of Fisher Scoring iterations: 9
## 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 60  5
##          1  1 41
##                                           
##                Accuracy : 0.9439          
##                  95% CI : (0.8819, 0.9791)
##     No Information Rate : 0.5701          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.8844          
##                                           
##  Mcnemar's Test P-Value : 0.2207          
##                                           
##             Sensitivity : 0.9836          
##             Specificity : 0.8913          
##          Pos Pred Value : 0.9231          
##          Neg Pred Value : 0.9762          
##              Prevalence : 0.5701          
##          Detection Rate : 0.5607          
##    Detection Prevalence : 0.6075          
##       Balanced Accuracy : 0.9375          
##                                           
##        'Positive' Class : 0               
## 
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## [1] "AUC:  0.982537419814683"
## 
## Call:
## roc.default(response = dfPred_raw$class, predictor = dfPred_raw$predict_reg,     plot = TRUE)
## 
## Data: dfPred_raw$predict_reg in 61 controls (dfPred_raw$class 0) < 46 cases (dfPred_raw$class 1).
## Area under the curve: 0.9825
## 
## Call:  glm(formula = fla, family = "binomial", data = df)
## 
## Coefficients:
## (Intercept)           zn        indus         chas          nox           rm  
##      2.3290      -1.5408      -0.4423       0.2339       5.7309      -0.4141  
##         age          dis          rad          tax      ptratio        lstat  
##      0.9683       1.5563       5.7880      -1.0362       0.8844       0.3258  
##        medv  
##      1.6708  
## 
## Degrees of Freedom: 465 Total (i.e. Null);  453 Residual
## Null Deviance:       645.9 
## Residual Deviance: 192   AIC: 218
df1 <- df %>%
  dplyr::select(nox, dis, rad, ptratio, medv, target)

EHModel_Regression_Logistic(df1, "target")
## 
## Call:  glm(formula = fla, family = "binomial", data = df)
## 
## Coefficients:
## (Intercept)          nox          dis          rad      ptratio         medv  
##   -31.27121     37.37652      0.29535      0.51558      0.28586      0.08635  
## 
## Degrees of Freedom: 465 Total (i.e. Null);  460 Residual
## Null Deviance:       645.9 
## Residual Deviance: 225.3     AIC: 237.3
## 
## Call:
## glm(formula = fla, family = "binomial", data = df)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.06137  -0.31295  -0.04733   0.00705   2.81210  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -31.27121    4.82619  -6.479 9.20e-11 ***
## nox          37.37652    5.56582   6.715 1.88e-11 ***
## dis           0.29535    0.14902   1.982  0.04748 *  
## rad           0.51558    0.11531   4.471 7.77e-06 ***
## ptratio       0.28586    0.09877   2.894  0.00380 ** 
## medv          0.08635    0.02832   3.050  0.00229 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 645.88  on 465  degrees of freedom
## Residual deviance: 225.32  on 460  degrees of freedom
## AIC: 237.32
## 
## Number of Fisher Scoring iterations: 8
## 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 72 12
##          1  2 69
##                                           
##                Accuracy : 0.9097          
##                  95% CI : (0.8531, 0.9497)
##     No Information Rate : 0.5226          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.82            
##                                           
##  Mcnemar's Test P-Value : 0.01616         
##                                           
##             Sensitivity : 0.9730          
##             Specificity : 0.8519          
##          Pos Pred Value : 0.8571          
##          Neg Pred Value : 0.9718          
##              Prevalence : 0.4774          
##          Detection Rate : 0.4645          
##    Detection Prevalence : 0.5419          
##       Balanced Accuracy : 0.9124          
##                                           
##        'Positive' Class : 0               
## 
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## [1] "AUC:  0.97630964297631"
## 
## Call:
## roc.default(response = dfPred_raw$class, predictor = dfPred_raw$predict_reg,     plot = TRUE)
## 
## Data: dfPred_raw$predict_reg in 74 controls (dfPred_raw$class 0) < 81 cases (dfPred_raw$class 1).
## Area under the curve: 0.9763
## 
## Call:  glm(formula = fla, family = "binomial", data = df)
## 
## Coefficients:
## (Intercept)          nox          dis          rad      ptratio         medv  
##   -31.27121     37.37652      0.29535      0.51558      0.28586      0.08635  
## 
## Degrees of Freedom: 465 Total (i.e. Null);  460 Residual
## Null Deviance:       645.9 
## Residual Deviance: 225.3     AIC: 237.3
dfT <- df %>%
  mutate(TaxOver600 = ifelse(tax>=600,1,0))

EHExplore_Interactions_Scatterplots(dfT, "target", "TaxOver600")
## [[1]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[2]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[3]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[4]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[5]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[6]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[7]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[8]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[9]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[10]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[11]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[12]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[13]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[14]]
## `geom_smooth()` using formula 'y ~ x'

dfT2 <- df %>%
  mutate(IndusOver16 = ifelse(indus>=16,1,0))

wrap_plots(EHExplore_Interactions_Scatterplots(dfT2, "target", "IndusOver16"))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

df4 <- dfT %>%
  mutate(lstatOver12 = ifelse(lstat>12,1,0), Inter_taxOver600_lstat = TaxOver600*lstat, Inter_lstatOver12_medv = lstatOver12*medv, IndusOver16 = ifelse(indus>=16,1,0)) 

#EHExplore_TwoCategoricalColumns_Barcharts(dfT, "TaxOver600")
wrap_plots(EHExplore_Interactions_Scatterplots(df4, "target", "lstatOver12"))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

plot(EHModel_Regression_Logistic(df4, "target")) 
## 
## Call:  glm(formula = fla, family = "binomial", data = df)
## 
## Coefficients:
##            (Intercept)                      zn                   indus  
##             -39.437128               -0.057955               -0.112958  
##                   chas                     nox                      rm  
##               1.373814               48.225024               -0.986275  
##                    age                     dis                     rad  
##               0.023800                0.678863                0.588571  
##                    tax                 ptratio                   lstat  
##              -0.001163                0.396786                0.136516  
##                   medv              TaxOver600             lstatOver12  
##               0.230639               -1.293461                3.463820  
## Inter_taxOver600_lstat  Inter_lstatOver12_medv             IndusOver16  
##              -0.273658               -0.237180                1.431433  
## 
## Degrees of Freedom: 465 Total (i.e. Null);  448 Residual
## Null Deviance:       645.9 
## Residual Deviance: 176.8     AIC: 212.8
## 
## Call:
## glm(formula = fla, family = "binomial", data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7834  -0.1444  -0.0034   0.0273   3.6843  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -39.437128   7.161435  -5.507 3.65e-08 ***
## zn                      -0.057955   0.033018  -1.755 0.079217 .  
## indus                   -0.112958   0.098781  -1.144 0.252821    
## chas                     1.373814   0.845214   1.625 0.104076    
## nox                     48.225024   8.059928   5.983 2.19e-09 ***
## rm                      -0.986275   0.796630  -1.238 0.215694    
## age                      0.023800   0.014601   1.630 0.103098    
## dis                      0.678863   0.238467   2.847 0.004416 ** 
## rad                      0.588571   0.156077   3.771 0.000163 ***
## tax                     -0.001163   0.004025  -0.289 0.772548    
## ptratio                  0.396786   0.136976   2.897 0.003770 ** 
## lstat                    0.136516   0.081492   1.675 0.093891 .  
## medv                     0.230639   0.078249   2.947 0.003204 ** 
## TaxOver600              -1.293461  13.243964  -0.098 0.922199    
## lstatOver12              3.463820   2.216992   1.562 0.118195    
## Inter_taxOver600_lstat  -0.273658   0.678037  -0.404 0.686504    
## Inter_lstatOver12_medv  -0.237180   0.103369  -2.294 0.021762 *  
## IndusOver16              1.431433   1.488158   0.962 0.336109    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 645.88  on 465  degrees of freedom
## Residual deviance: 176.79  on 448  degrees of freedom
## AIC: 212.79
## 
## Number of Fisher Scoring iterations: 10
## 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 42  7
##          1  6 48
##                                           
##                Accuracy : 0.8738          
##                  95% CI : (0.7938, 0.9311)
##     No Information Rate : 0.534           
##     P-Value [Acc > NIR] : 1.872e-13       
##                                           
##                   Kappa : 0.7467          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.8750          
##             Specificity : 0.8727          
##          Pos Pred Value : 0.8571          
##          Neg Pred Value : 0.8889          
##              Prevalence : 0.4660          
##          Detection Rate : 0.4078          
##    Detection Prevalence : 0.4757          
##       Balanced Accuracy : 0.8739          
##                                           
##        'Positive' Class : 0               
## 
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## [1] "AUC:  0.964015151515151"
## 
## Call:
## roc.default(response = dfPred_raw$class, predictor = dfPred_raw$predict_reg,     plot = TRUE)
## 
## Data: dfPred_raw$predict_reg in 48 controls (dfPred_raw$class 0) < 55 cases (dfPred_raw$class 1).
## Area under the curve: 0.964

df5 <- df4 %>%
  dplyr::filter(rownames(df4) != 338)

df6 <- df5 %>%
  dplyr::select(-IndusOver16) %>%
  mutate(zn=log(zn+1))


plot(EHModel_Regression_Logistic(df6, "target"))
## 
## Call:  glm(formula = fla, family = "binomial", data = df)
## 
## Coefficients:
##            (Intercept)                      zn                   indus  
##             -4.669e+01              -6.879e-01              -4.388e-02  
##                   chas                     nox                      rm  
##              1.170e+00               5.695e+01              -1.279e+00  
##                    age                     dis                     rad  
##              3.427e-02               9.827e-01               6.641e-01  
##                    tax                 ptratio                   lstat  
##             -7.533e-04               3.948e-01               1.623e-01  
##                   medv              TaxOver600             lstatOver12  
##              2.970e-01              -1.398e+00               3.418e+00  
## Inter_taxOver600_lstat  Inter_lstatOver12_medv  
##             -2.948e-01              -2.526e-01  
## 
## Degrees of Freedom: 464 Total (i.e. Null);  448 Residual
## Null Deviance:       644.5 
## Residual Deviance: 162   AIC: 196
## 
## Call:
## glm(formula = fla, family = "binomial", data = df)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.68080  -0.11809  -0.00483   0.01816   2.58674  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -4.669e+01  8.034e+00  -5.811 6.21e-09 ***
## zn                     -6.879e-01  3.002e-01  -2.292 0.021927 *  
## indus                  -4.388e-02  5.699e-02  -0.770 0.441294    
## chas                    1.170e+00  8.761e-01   1.336 0.181557    
## nox                     5.695e+01  9.078e+00   6.273 3.54e-10 ***
## rm                     -1.279e+00  8.428e-01  -1.518 0.129035    
## age                     3.427e-02  1.595e-02   2.149 0.031637 *  
## dis                     9.827e-01  2.818e-01   3.487 0.000488 ***
## rad                     6.641e-01  1.731e-01   3.837 0.000125 ***
## tax                    -7.533e-04  4.271e-03  -0.176 0.859987    
## ptratio                 3.948e-01  1.494e-01   2.643 0.008218 ** 
## lstat                   1.623e-01  8.504e-02   1.909 0.056248 .  
## medv                    2.970e-01  8.394e-02   3.538 0.000403 ***
## TaxOver600             -1.398e+00  1.638e+01  -0.085 0.931975    
## lstatOver12             3.418e+00  2.294e+00   1.489 0.136359    
## Inter_taxOver600_lstat -2.948e-01  8.650e-01  -0.341 0.733264    
## Inter_lstatOver12_medv -2.526e-01  1.071e-01  -2.360 0.018293 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 644.45  on 464  degrees of freedom
## Residual deviance: 161.98  on 448  degrees of freedom
## AIC: 195.98
## 
## Number of Fisher Scoring iterations: 10
## 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 55  1
##          1  3 52
##                                           
##                Accuracy : 0.964           
##                  95% CI : (0.9103, 0.9901)
##     No Information Rate : 0.5225          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9279          
##                                           
##  Mcnemar's Test P-Value : 0.6171          
##                                           
##             Sensitivity : 0.9483          
##             Specificity : 0.9811          
##          Pos Pred Value : 0.9821          
##          Neg Pred Value : 0.9455          
##              Prevalence : 0.5225          
##          Detection Rate : 0.4955          
##    Detection Prevalence : 0.5045          
##       Balanced Accuracy : 0.9647          
##                                           
##        'Positive' Class : 0               
## 
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## [1] "AUC:  0.993819128171763"
## 
## Call:
## roc.default(response = dfPred_raw$class, predictor = dfPred_raw$predict_reg,     plot = TRUE)
## 
## Data: dfPred_raw$predict_reg in 58 controls (dfPred_raw$class 0) < 53 cases (dfPred_raw$class 1).
## Area under the curve: 0.9938

df11 <- df5 %>%
  dplyr::select(target, zn, nox, age, dis, rad, ptratio, medv, indus, IndusOver16) %>%
  mutate(inter = indus*IndusOver16)

EHModel_Regression_Logistic(df11, "target")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Call:  glm(formula = fla, family = "binomial", data = df)
## 
## Coefficients:
## (Intercept)           zn          nox          age          dis          rad  
##   -42.01114     -0.07453     44.50486      0.03490      0.77365      0.53383  
##     ptratio         medv        indus  IndusOver16        inter  
##     0.40204      0.13838     -0.05919    152.54358     -6.95711  
## 
## Degrees of Freedom: 464 Total (i.e. Null);  454 Residual
## Null Deviance:       644.5 
## Residual Deviance: 174.5     AIC: 196.5
## 
## Call:
## glm(formula = fla, family = "binomial", data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0340  -0.1419   0.0000   0.0000   3.1217  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -42.01114    7.17696  -5.854 4.81e-09 ***
## zn            -0.07453    0.03557  -2.095 0.036129 *  
## nox           44.50486    7.85438   5.666 1.46e-08 ***
## age            0.03490    0.01153   3.027 0.002467 ** 
## dis            0.77365    0.22698   3.408 0.000653 ***
## rad            0.53383    0.13471   3.963 7.41e-05 ***
## ptratio        0.40204    0.13231   3.039 0.002377 ** 
## medv           0.13838    0.03959   3.495 0.000474 ***
## indus         -0.05919    0.09302  -0.636 0.524582    
## IndusOver16  152.54358 9215.00483   0.017 0.986793    
## inter         -6.95711  420.96877  -0.017 0.986814    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 644.45  on 464  degrees of freedom
## Residual deviance: 174.55  on 454  degrees of freedom
## AIC: 196.55
## 
## Number of Fisher Scoring iterations: 20
## 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 60  7
##          1  3 57
##                                         
##                Accuracy : 0.9213        
##                  95% CI : (0.86, 0.9616)
##     No Information Rate : 0.5039        
##     P-Value [Acc > NIR] : <2e-16        
##                                         
##                   Kappa : 0.8426        
##                                         
##  Mcnemar's Test P-Value : 0.3428        
##                                         
##             Sensitivity : 0.9524        
##             Specificity : 0.8906        
##          Pos Pred Value : 0.8955        
##          Neg Pred Value : 0.9500        
##              Prevalence : 0.4961        
##          Detection Rate : 0.4724        
##    Detection Prevalence : 0.5276        
##       Balanced Accuracy : 0.9215        
##                                         
##        'Positive' Class : 0             
## 
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## [1] "AUC:  0.976686507936508"
## 
## Call:
## roc.default(response = dfPred_raw$class, predictor = dfPred_raw$predict_reg,     plot = TRUE)
## 
## Data: dfPred_raw$predict_reg in 63 controls (dfPred_raw$class 0) < 64 cases (dfPred_raw$class 1).
## Area under the curve: 0.9767
## 
## Call:  glm(formula = fla, family = "binomial", data = df)
## 
## Coefficients:
## (Intercept)           zn          nox          age          dis          rad  
##   -42.01114     -0.07453     44.50486      0.03490      0.77365      0.53383  
##     ptratio         medv        indus  IndusOver16        inter  
##     0.40204      0.13838     -0.05919    152.54358     -6.95711  
## 
## Degrees of Freedom: 464 Total (i.e. Null);  454 Residual
## Null Deviance:       644.5 
## Residual Deviance: 174.5     AIC: 196.5

Building Interactions

dfInt <- df %>%
  mutate(TaxOver600 = ifelse(tax>=600,1,0)) %>%
    mutate(ptOver13 = as.numeric(ifelse(ptratio>13,1,0))) %>%
    mutate(lstatOver12 = ifelse(lstat>12,1,0)) %>%
    mutate(IndusOver16 = ifelse(indus>=16,1,0)) %>%
    mutate(ZnOver0 = ifelse(zn>0,1,0)) %>%
    mutate(NoxOverPoint8 = ifelse(nox>=.8,1,0)) %>%
    mutate(MedvBelow50 = ifelse(medv<50,1,0))

EHModel_Regression_Logistic(dfInt, "target")
## 
## Call:  glm(formula = fla, family = "binomial", data = df)
## 
## Coefficients:
##   (Intercept)             zn          indus           chas            nox  
##    -33.528688      -0.049437      -0.056815       0.964549      36.963196  
##            rm            age            dis            rad            tax  
##     -0.819963       0.038134       1.025748       0.648733       0.001652  
##       ptratio          lstat           medv     TaxOver600       ptOver13  
##      0.518286       0.171361       0.221716      -7.280268      -8.156292  
##   lstatOver12    IndusOver16        ZnOver0  NoxOverPoint8    MedvBelow50  
##     -1.372613       1.804373      -2.409817       7.538132       0.897657  
## 
## Degrees of Freedom: 465 Total (i.e. Null);  446 Residual
## Null Deviance:       645.9 
## Residual Deviance: 172.8     AIC: 212.8
## 
## Call:
## glm(formula = fla, family = "binomial", data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4794  -0.1493  -0.0023   0.0198   4.1463  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -33.528688   9.574531  -3.502 0.000462 ***
## zn             -0.049437   0.068585  -0.721 0.471027    
## indus          -0.056815   0.103823  -0.547 0.584224    
## chas            0.964549   0.846075   1.140 0.254275    
## nox            36.963196   9.333723   3.960 7.49e-05 ***
## rm             -0.819963   0.831087  -0.987 0.323831    
## age             0.038134   0.014766   2.583 0.009805 ** 
## dis             1.025748   0.292165   3.511 0.000447 ***
## rad             0.648733   0.164036   3.955 7.66e-05 ***
## tax             0.001652   0.004013   0.412 0.680551    
## ptratio         0.518286   0.152180   3.406 0.000660 ***
## lstat           0.171361   0.077858   2.201 0.027739 *  
## medv            0.221716   0.085963   2.579 0.009903 ** 
## TaxOver600     -7.280268   4.386502  -1.660 0.096975 .  
## ptOver13       -8.156292   4.564175  -1.787 0.073934 .  
## lstatOver12    -1.372613   0.676083  -2.030 0.042332 *  
## IndusOver16     1.804373   1.467205   1.230 0.218771    
## ZnOver0        -2.409817   1.772536  -1.360 0.173978    
## NoxOverPoint8   7.538132 953.240826   0.008 0.993690    
## MedvBelow50     0.897657   2.107985   0.426 0.670227    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 645.88  on 465  degrees of freedom
## Residual deviance: 172.77  on 446  degrees of freedom
## AIC: 212.77
## 
## Number of Fisher Scoring iterations: 16
## 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 47  6
##          1  2 38
##                                           
##                Accuracy : 0.914           
##                  95% CI : (0.8375, 0.9621)
##     No Information Rate : 0.5269          
##     P-Value [Acc > NIR] : 6.31e-16        
##                                           
##                   Kappa : 0.8267          
##                                           
##  Mcnemar's Test P-Value : 0.2888          
##                                           
##             Sensitivity : 0.9592          
##             Specificity : 0.8636          
##          Pos Pred Value : 0.8868          
##          Neg Pred Value : 0.9500          
##              Prevalence : 0.5269          
##          Detection Rate : 0.5054          
##    Detection Prevalence : 0.5699          
##       Balanced Accuracy : 0.9114          
##                                           
##        'Positive' Class : 0               
## 
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## [1] "AUC:  0.963358070500928"
## 
## Call:
## roc.default(response = dfPred_raw$class, predictor = dfPred_raw$predict_reg,     plot = TRUE)
## 
## Data: dfPred_raw$predict_reg in 49 controls (dfPred_raw$class 0) < 44 cases (dfPred_raw$class 1).
## Area under the curve: 0.9634
## 
## Call:  glm(formula = fla, family = "binomial", data = df)
## 
## Coefficients:
##   (Intercept)             zn          indus           chas            nox  
##    -33.528688      -0.049437      -0.056815       0.964549      36.963196  
##            rm            age            dis            rad            tax  
##     -0.819963       0.038134       1.025748       0.648733       0.001652  
##       ptratio          lstat           medv     TaxOver600       ptOver13  
##      0.518286       0.171361       0.221716      -7.280268      -8.156292  
##   lstatOver12    IndusOver16        ZnOver0  NoxOverPoint8    MedvBelow50  
##     -1.372613       1.804373      -2.409817       7.538132       0.897657  
## 
## Degrees of Freedom: 465 Total (i.e. Null);  446 Residual
## Null Deviance:       645.9 
## Residual Deviance: 172.8     AIC: 212.8
a <- EHExplore_Interactions_Scatterplots(dfInt, "target", "MedvBelow50")
grid.arrange(grobs=a[1:9], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

grid.arrange(grobs=a[10:18], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

grid.arrange(grobs=a[19:20], ncol=2, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

a <- EHExplore_Interactions_Scatterplots(dfInt, "target", "NoxOverPoint8")
grid.arrange(grobs=a[1:9], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

grid.arrange(grobs=a[10:18], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

grid.arrange(grobs=a[19:20], ncol=2, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

a <- EHExplore_Interactions_Scatterplots(dfInt, "target", "ZnOver0")
grid.arrange(grobs=a[1:9], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

grid.arrange(grobs=a[10:18], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

grid.arrange(grobs=a[19:20], ncol=2, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

a <- EHExplore_Interactions_Scatterplots(dfInt, "target", "IndusOver16")
grid.arrange(grobs=a[1:9], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

grid.arrange(grobs=a[10:18], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

grid.arrange(grobs=a[19:20], ncol=2, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

a <- EHExplore_Interactions_Scatterplots(dfInt, "target", "lstatOver12")
grid.arrange(grobs=a[1:9], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

grid.arrange(grobs=a[10:18], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

grid.arrange(grobs=a[19:20], ncol=2, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

a <- EHExplore_Interactions_Scatterplots(dfInt, "target", "ptOver13")
grid.arrange(grobs=a[1:9], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

grid.arrange(grobs=a[10:18], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

grid.arrange(grobs=a[19:20], ncol=2, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

a <- EHExplore_Interactions_Scatterplots(dfInt, "target", "TaxOver600")
grid.arrange(grobs=a[1:9], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

grid.arrange(grobs=a[10:18], ncol=3, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

grid.arrange(grobs=a[19:20], ncol=2, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

#LstatOver12
#IndusOver16
#ptOver13
#ZnOver0
#NoxOverPoint8
#radOver15
#TaxOver600
#ptratioBelow20
#medvBelow50